Lab4 Homework

Author

Kyra Guy

1.

if (!file.exists("met_all.gz"))
  download.file(
    url = "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz",
    destfile = "met_all.gz",
    method   = "libcurl",
    timeout  = 60
    )
met <- data.table::fread("met_all.gz")

Remove temps less than -17C, make sure there are no missing values

met <- met[met$temp > -17][elev == 9999.0, elev := NA][wind.sp == 9999.0, wind.sp := NA][dew.point == 9999.0, dew.point := NA][rh == 9999.0, rh := NA]

Date variable

met$date_variable <- as.Date(paste(met$year, met$month, met$day, sep = "-"))
print(met) 
         USAFID  WBAN year month day hour min    lat      lon elev wind.dir
      1: 690150 93121 2019     8   1    0  56 34.300 -116.166  696      220
      2: 690150 93121 2019     8   1    1  56 34.300 -116.166  696      230
      3: 690150 93121 2019     8   1    2  56 34.300 -116.166  696      230
      4: 690150 93121 2019     8   1    3  56 34.300 -116.166  696      210
      5: 690150 93121 2019     8   1    4  56 34.300 -116.166  696      120
     ---                                                                   
2317200: 726813 94195 2019     8  31   19  56 43.650 -116.633  741       70
2317201: 726813 94195 2019     8  31   20  56 43.650 -116.633  741       NA
2317202: 726813 94195 2019     8  31   21  56 43.650 -116.633  741       10
2317203: 726813 94195 2019     8  31   22  56 43.642 -116.636  741       10
2317204: 726813 94195 2019     8  31   23  56 43.642 -116.636  741       40
         wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
      1:           5              N     5.7          5      22000             5
      2:           5              N     8.2          5      22000             5
      3:           5              N     6.7          5      22000             5
      4:           5              N     5.1          5      22000             5
      5:           5              N     2.1          5      22000             5
     ---                                                                       
2317200:           5              N     2.1          5      22000             5
2317201:           9              C     0.0          5      22000             5
2317202:           5              N     2.6          5      22000             5
2317203:           1              N     2.1          1      22000             1
2317204:           1              N     2.1          1      22000             1
         ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc
      1:                 9        N    16093           5       N          5
      2:                 9        N    16093           5       N          5
      3:                 9        N    16093           5       N          5
      4:                 9        N    16093           5       N          5
      5:                 9        N    16093           5       N          5
     ---                                                                   
2317200:                 9        N    16093           5       N          5
2317201:                 9        N    16093           5       N          5
2317202:                 9        N    14484           5       N          5
2317203:                 9        N    16093           1       9          9
2317204:                 9        N    16093           1       9          9
         temp temp.qc dew.point dew.point.qc atm.press atm.press.qc       rh
      1: 37.2       5      10.6            5    1009.9            5 19.88127
      2: 35.6       5      10.6            5    1010.3            5 21.76098
      3: 34.4       5       7.2            5    1010.6            5 18.48212
      4: 33.3       5       5.0            5    1011.6            5 16.88862
      5: 32.8       5       5.0            5    1012.7            5 17.38410
     ---                                                                    
2317200: 32.2       5      12.2            5    1012.8            5 29.40686
2317201: 33.3       5      12.2            5    1011.6            5 27.60422
2317202: 35.0       5       9.4            5    1010.8            5 20.76325
2317203: 34.4       1       9.4            1    1010.1            1 21.48631
2317204: 34.4       1       9.4            1    1009.6            1 21.48631
         date_variable
      1:    2019-08-01
      2:    2019-08-01
      3:    2019-08-01
      4:    2019-08-01
      5:    2019-08-01
     ---              
2317200:    2019-08-31
2317201:    2019-08-31
2317202:    2019-08-31
2317203:    2019-08-31
2317204:    2019-08-31

Keep the first week of the month

met$weeknumber <- data.table::week(as.Date(paste(met$year, met$month, met$day, sep = "-")))
print(met)
         USAFID  WBAN year month day hour min    lat      lon elev wind.dir
      1: 690150 93121 2019     8   1    0  56 34.300 -116.166  696      220
      2: 690150 93121 2019     8   1    1  56 34.300 -116.166  696      230
      3: 690150 93121 2019     8   1    2  56 34.300 -116.166  696      230
      4: 690150 93121 2019     8   1    3  56 34.300 -116.166  696      210
      5: 690150 93121 2019     8   1    4  56 34.300 -116.166  696      120
     ---                                                                   
2317200: 726813 94195 2019     8  31   19  56 43.650 -116.633  741       70
2317201: 726813 94195 2019     8  31   20  56 43.650 -116.633  741       NA
2317202: 726813 94195 2019     8  31   21  56 43.650 -116.633  741       10
2317203: 726813 94195 2019     8  31   22  56 43.642 -116.636  741       10
2317204: 726813 94195 2019     8  31   23  56 43.642 -116.636  741       40
         wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
      1:           5              N     5.7          5      22000             5
      2:           5              N     8.2          5      22000             5
      3:           5              N     6.7          5      22000             5
      4:           5              N     5.1          5      22000             5
      5:           5              N     2.1          5      22000             5
     ---                                                                       
2317200:           5              N     2.1          5      22000             5
2317201:           9              C     0.0          5      22000             5
2317202:           5              N     2.6          5      22000             5
2317203:           1              N     2.1          1      22000             1
2317204:           1              N     2.1          1      22000             1
         ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc
      1:                 9        N    16093           5       N          5
      2:                 9        N    16093           5       N          5
      3:                 9        N    16093           5       N          5
      4:                 9        N    16093           5       N          5
      5:                 9        N    16093           5       N          5
     ---                                                                   
2317200:                 9        N    16093           5       N          5
2317201:                 9        N    16093           5       N          5
2317202:                 9        N    14484           5       N          5
2317203:                 9        N    16093           1       9          9
2317204:                 9        N    16093           1       9          9
         temp temp.qc dew.point dew.point.qc atm.press atm.press.qc       rh
      1: 37.2       5      10.6            5    1009.9            5 19.88127
      2: 35.6       5      10.6            5    1010.3            5 21.76098
      3: 34.4       5       7.2            5    1010.6            5 18.48212
      4: 33.3       5       5.0            5    1011.6            5 16.88862
      5: 32.8       5       5.0            5    1012.7            5 17.38410
     ---                                                                    
2317200: 32.2       5      12.2            5    1012.8            5 29.40686
2317201: 33.3       5      12.2            5    1011.6            5 27.60422
2317202: 35.0       5       9.4            5    1010.8            5 20.76325
2317203: 34.4       1       9.4            1    1010.1            1 21.48631
2317204: 34.4       1       9.4            1    1009.6            1 21.48631
         date_variable weeknumber
      1:    2019-08-01         31
      2:    2019-08-01         31
      3:    2019-08-01         31
      4:    2019-08-01         31
      5:    2019-08-01         31
     ---                         
2317200:    2019-08-31         35
2317201:    2019-08-31         35
2317202:    2019-08-31         35
2317203:    2019-08-31         35
2317204:    2019-08-31         35

Calculate averages

met_avg <- met[,.(
  temp     = mean(temp,na.rm=TRUE),
  rh       = mean(rh,na.rm=TRUE),
  wind.sp  = mean(wind.sp,na.rm=TRUE),
  vis.dist = mean(vis.dist,na.rm=TRUE),
  lat      = mean(lat),
  lon      = mean(lon), 
  elev     = mean(elev,na.rm=TRUE),
  dew.point = mean(dew.point,na.rm=TRUE)
), by=c("USAFID", "day")]

met_avg
       USAFID day     temp       rh  wind.sp vis.dist      lat       lon
    1: 690150   1 32.59583 20.65546 2.895833 15958.92 34.29967 -116.1657
    2: 690150   2 33.54167 13.18436 4.154167 16093.00 34.30000 -116.1660
    3: 690150   3 34.30833 14.20129 4.433333 16025.96 34.30000 -116.1660
    4: 690150   4 34.72083 11.70307 4.245833 16093.00 34.30000 -116.1660
    5: 690150   5 34.77391 13.20827 3.660870 15953.09 34.30000 -116.1660
   ---                                                                  
48714: 726813  27 20.05833 44.96461 1.579167 15891.88 43.65000 -116.6330
48715: 726813  28 21.18333 46.79024 1.175000 15623.67 43.65000 -116.6330
48716: 726813  29 24.32083 49.07651 1.733333 16025.96 43.65000 -116.6330
48717: 726813  30 24.54583 50.43012 3.508333 16025.96 43.64967 -116.6331
48718: 726813  31 24.27917 51.08570 1.208333 16025.96 43.64933 -116.6332
           elev  dew.point
    1: 690.0833  6.3000000
    2: 696.0000  1.2166667
    3: 696.0000  2.4958333
    4: 696.0000  0.5541667
    5: 696.0000  1.4956522
   ---                    
48714: 741.0000  5.9375000
48715: 741.0000  6.8041667
48716: 741.0000 10.8083333
48717: 741.0000 12.3083333
48718: 741.0000 11.6875000

Create region variable

met_avg$Regionlat <- cut(met_avg$lat, breaks=c(24.55525, 39.71 , 48.941), labels=c('N' , 'S')) 
met_avg$Regionlon <- cut(met_avg$lon, breaks=c(-124.29, -98.00 , -68.313), labels=c('E' , 'W'))

met_avg$Region<- paste(met_avg$Regionlat, met_avg$Regionlon, sep = "")
print(met_avg)
       USAFID day     temp       rh  wind.sp vis.dist      lat       lon
    1: 690150   1 32.59583 20.65546 2.895833 15958.92 34.29967 -116.1657
    2: 690150   2 33.54167 13.18436 4.154167 16093.00 34.30000 -116.1660
    3: 690150   3 34.30833 14.20129 4.433333 16025.96 34.30000 -116.1660
    4: 690150   4 34.72083 11.70307 4.245833 16093.00 34.30000 -116.1660
    5: 690150   5 34.77391 13.20827 3.660870 15953.09 34.30000 -116.1660
   ---                                                                  
48714: 726813  27 20.05833 44.96461 1.579167 15891.88 43.65000 -116.6330
48715: 726813  28 21.18333 46.79024 1.175000 15623.67 43.65000 -116.6330
48716: 726813  29 24.32083 49.07651 1.733333 16025.96 43.65000 -116.6330
48717: 726813  30 24.54583 50.43012 3.508333 16025.96 43.64967 -116.6331
48718: 726813  31 24.27917 51.08570 1.208333 16025.96 43.64933 -116.6332
           elev  dew.point Regionlat Regionlon Region
    1: 690.0833  6.3000000         N         E     NE
    2: 696.0000  1.2166667         N         E     NE
    3: 696.0000  2.4958333         N         E     NE
    4: 696.0000  0.5541667         N         E     NE
    5: 696.0000  1.4956522         N         E     NE
   ---                                               
48714: 741.0000  5.9375000         S         E     SE
48715: 741.0000  6.8041667         S         E     SE
48716: 741.0000 10.8083333         S         E     SE
48717: 741.0000 12.3083333         S         E     SE
48718: 741.0000 11.6875000         S         E     SE

Categorical variable for elevation

met_avg[, elev_cat := ifelse(elev > 252, "high", "low")]
met_avg[, vis_cat  := cut(
  x      = vis.dist,
  breaks = c(0, 1000, 6000, 10000, Inf),
  labels = c("fog", "mist", "haze", "clear"),
  right  = FALSE
)]

print(met_avg)
       USAFID day     temp       rh  wind.sp vis.dist      lat       lon
    1: 690150   1 32.59583 20.65546 2.895833 15958.92 34.29967 -116.1657
    2: 690150   2 33.54167 13.18436 4.154167 16093.00 34.30000 -116.1660
    3: 690150   3 34.30833 14.20129 4.433333 16025.96 34.30000 -116.1660
    4: 690150   4 34.72083 11.70307 4.245833 16093.00 34.30000 -116.1660
    5: 690150   5 34.77391 13.20827 3.660870 15953.09 34.30000 -116.1660
   ---                                                                  
48714: 726813  27 20.05833 44.96461 1.579167 15891.88 43.65000 -116.6330
48715: 726813  28 21.18333 46.79024 1.175000 15623.67 43.65000 -116.6330
48716: 726813  29 24.32083 49.07651 1.733333 16025.96 43.65000 -116.6330
48717: 726813  30 24.54583 50.43012 3.508333 16025.96 43.64967 -116.6331
48718: 726813  31 24.27917 51.08570 1.208333 16025.96 43.64933 -116.6332
           elev  dew.point Regionlat Regionlon Region elev_cat vis_cat
    1: 690.0833  6.3000000         N         E     NE     high   clear
    2: 696.0000  1.2166667         N         E     NE     high   clear
    3: 696.0000  2.4958333         N         E     NE     high   clear
    4: 696.0000  0.5541667         N         E     NE     high   clear
    5: 696.0000  1.4956522         N         E     NE     high   clear
   ---                                                                
48714: 741.0000  5.9375000         S         E     SE     high   clear
48715: 741.0000  6.8041667         S         E     SE     high   clear
48716: 741.0000 10.8083333         S         E     SE     high   clear
48717: 741.0000 12.3083333         S         E     SE     high   clear
48718: 741.0000 11.6875000         S         E     SE     high   clear

3.

Geom_violin to examine wind speed and dew point by region

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(magrittr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.4
✔ ggplot2   3.4.3     ✔ stringr   1.5.0
✔ lubridate 1.9.2     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::extract()   masks magrittr::extract()
✖ dplyr::filter()    masks stats::filter()
✖ dplyr::lag()       masks stats::lag()
✖ purrr::set_names() masks magrittr::set_names()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(data.table)

Attaching package: 'data.table'

The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year

The following object is masked from 'package:purrr':

    transpose

The following objects are masked from 'package:dplyr':

    between, first, last
met_avg[!is.na(dew.point)] %>%
     ggplot() +
     geom_violin(mapping = aes(x = 1, y = dew.point), fill = "green")

 met_avg[!is.na(dew.point)][!is.na(wind.sp)] %>%
     ggplot(aes(x = Region)) +  
     geom_violin(aes(y = wind.sp, fill = "Wind Speed"), width = 0.7) +  
     geom_violin(aes(y = dew.point, fill = "Dew Point"), width = 0.7) +  
     labs(title = "Wind Speed and Dew Point by Region", y = "Values", fill = NULL) +
     theme_minimal() +
     theme(axis.text.x = element_blank()) +
     scale_fill_manual(values = c("lightblue", "lightgreen")) 

4.

Geom_jitter and stat_smooth to examine wind speed and dew point by region

met_avg[!is.na(dew.point)][!is.na(wind.sp)] %>%
     ggplot(aes(x = dew.point, y = wind.sp, color = Region)) +
     geom_jitter(width = 0.2, height = 0.2, alpha = 0.5) +  
     stat_smooth(method = "lm", se = FALSE) +  
     labs(title = "Association between Dew Point and Wind Speed by Region", 
          x = "Dew Point", y = "Wind Speed") +
     scale_color_brewer(palette = "Set1") +  
     theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

5.

Barcharts of weather stations by elevation category colored by region

met_avg[!is.na(dew.point)][!is.na(wind.sp)] %>%
     ggplot(aes(x = elev_cat, fill = Region)) +  
     geom_bar(position = "dodge") +
     scale_fill_brewer(palette = "Set1") +
     labs(title = "Weather Stations by Elevation Category", x = "Elevation Category", y = "Count") +
     theme_minimal() +
     theme(axis.text.x = element_text(angle = 45, hjust = 1))

6.

Stat Summary to examine mean dew point and wind speed by region with standard deviation error bars 

met_avg[!is.na(dew.point)][!is.na(wind.sp)] %>%
     ggplot() +
     stat_summary(
         mapping = aes(x = Region, y = dew.point),
         fun.data = "mean_sdl",
         fun.args = list(mult = 1),
         geom = "errorbar",
         position = position_dodge(width = 0.7),
         width = 0.2,
         color = "blue"  # Color for dew point error bars
     ) +
     stat_summary(
         mapping = aes(x = Region, y = wind.sp),
         fun.data = "mean_sdl",
         fun.args = list(mult = 1),
         geom = "errorbar",
         position = position_dodge(width = 0.4),
         width = 0.2,
         color = "green"  # Color for wind speed error bars
     ) +
     labs(
         title = "Mean Dew Point and Wind Speed by Region",
         x = "Region",
         y = "Value"
     ) +
     theme_minimal()

Dew point is higher in the Northwest

7.

Make a map showing the spatial trend in relative humidity in the US

library(leaflet)
met_avg3 <- subset(met_avg,select = c("USAFID", "rh","lat", "lon"))
met_avg3 <- na.omit(met_avg3)
rh.pal <- colorNumeric(c('darkgreen','goldenrod','brown'), domain=met_avg3$rh)
rh.pal
function (x) 
{
    if (length(x) == 0 || all(is.na(x))) {
        return(pf(x))
    }
    if (is.null(rng)) 
        rng <- range(x, na.rm = TRUE)
    rescaled <- scales::rescale(x, from = rng)
    if (any(rescaled < 0 | rescaled > 1, na.rm = TRUE)) 
        warning("Some values were outside the color scale and will be treated as NA")
    if (reverse) {
        rescaled <- 1 - rescaled
    }
    pf(rescaled)
}
<bytecode: 0x11c522530>
<environment: 0x11c520f50>
attr(,"colorType")
[1] "numeric"
attr(,"colorArgs")
attr(,"colorArgs")$na.color
[1] "#808080"
rhmap <- leaflet(met_avg3) %>% 
          # The looks of the Map
          addProviderTiles('CartoDB.Positron') %>% 
          # Some circles
          addCircles(
                  lat = ~lat, lng=~lon,
                      label = ~paste0(round(rh,2), ' C'), color = ~ rh.pal(rh),
                  opacity = 1, fillOpacity = 1, radius = 500
              ) %>%
          addLegend('bottomleft', pal = rh.pal, values=met_avg3$rh, 
                                    title='Relative Humidity, C', opacity=1)
print(rhmap)
rhmap

It is hotter on the coasts.

Use a ggplot extension: gganimate

library(ggplot2)
library(gganimate)
ggplot(met_avg, aes(factor(elev_cat), dew.point)) + 
  geom_boxplot()  +
  transition_states(
    elev_cat,
    transition_length = 2,
    state_length = 1
  ) +
  enter_fade() + 
  exit_shrink() +
  ease_aes('sine-in-out')
Warning: Removed 20 rows containing non-finite values (`stat_boxplot()`).